Bankruptcy prediction project¶

Asimina Tzana AIVC21015 aivc21015@uniwa.gr Part 2

In [1]:
#import libraries
import os

import matplotlib.pyplot as plt
import openpyxl
import seaborn as sns
import xlrd
import plotly.express as px
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from imblearn import under_sampling

from keras.models import Sequential
from keras.layers import Dropout
from keras.layers import Dense
In [2]:
# read the data
try:
 # Confirm file exists.
 df = pd.read_excel("InputData\\Dataset.xlsx")
 print("Column headings:")
 print(df.columns)
except FileNotFoundError:
 print(FileNotFoundError)
Column headings:
Index(['365* ( Β.Υ / Κοστ.Πωλ )', 'Λειτ.Αποτ/Συν.Ενεργ. (ROA)',
       'ΧΡΗΜ.ΔΑΠΑΝΕΣ / ΠΩΛΗΣΕΙΣ',
       ' ΠΡΑΓΜΑΤΙΚΗ ΡΕΥΣΤΟΤΗΤΑ :  (ΚΕ-ΑΠΟΘΕΜΑΤΑ) / Β.Υ', '(ΑΠΑΙΤ.*365) / ΠΩΛ.',
       'Συν.Υποχρ/Συν.Ενεργ', 'Διάρκεια Παραμονής Αποθεμάτων',
       'Λογαριθμος Προσωπικού', 'ΕΝΔΕΙΞΗ ΕΞΑΓΩΓΩΝ', 'ΕΝΔΕΙΞΗ ΕΙΣΑΓΩΓΩΝ',
       'ΕΝΔΕΙΞΗ ΑΝΤΙΠΡΟΣΩΠΕΙΩΝ', 'ΕΝΔΕΙΞΗ ΑΣΥΝΕΠΕΙΑΣ (=2) (ν+1)', 'ΕΤΟΣ'],
      dtype='object')
In [3]:
#Plot original data
fig = px.pie(df, values='ΕΝΔΕΙΞΗ ΑΣΥΝΕΠΕΙΑΣ (=2) (ν+1)', names='ΕΝΔΕΙΞΗ ΑΣΥΝΕΠΕΙΑΣ (=2) (ν+1)')
fig.show()
In [4]:
inputData= df[df.columns[0:10]].values
In [5]:
outputData = df['ΕΝΔΕΙΞΗ ΑΣΥΝΕΠΕΙΑΣ (=2) (ν+1)']
outputData , levels=pd.factorize(outputData)
In [6]:
print(' .. we have', inputData.shape[0], 'available paradigms.')
print(' .. each paradigm has', inputData.shape[1], 'features')

print(' ... the distribution for the available class lebels is:')
for classIdx in range(0, len(np.unique(outputData))):
    tmpCount = sum(outputData == classIdx)
    tmpPercentage = tmpCount/len(outputData)
    print(' .. class', str(classIdx), 'has', str(tmpCount), 'instances', '(', '{:.2f}'.format(tmpPercentage), '%)')
 .. we have 10716 available paradigms.
 .. each paradigm has 10 features
 ... the distribution for the available class lebels is:
 .. class 0 has 10468 instances ( 0.98 %)
 .. class 1 has 248 instances ( 0.02 %)
In [7]:
#Split data into Training and Testing Sets 
X_train, X_test, y_train, y_test = train_test_split(inputData, outputData, random_state=0)
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
In [8]:
#Perform undersample techique
failed=0
non_failed=0
for i in range(len(y_train)):
    if y_train[i]==1:
        failed=failed+1
print("Companies That Went Bankrupt in training set",failed)
for i in range(len(y_train)):
    if y_train[i]==0:
        non_failed=non_failed+1
print("Healthy companies in training",non_failed)

synolo2=0
synolo1=0
for i in range(len(y_test)):
    if y_test[i]==1:
        synolo2=synolo2+1
print("Healthy companies in test set",synolo2)
for i in range(len(y_test)):
    if y_test[i]==0:
        synolo1=synolo1+1
print("Companies That Went Bankrupt in test set",synolo1)


#analogy 3:1 for training set
rus = under_sampling.RandomUnderSampler(
    sampling_strategy={
        0: failed*3,
        1: failed,

    },
    random_state=42
)

failed=0
non_failed=0

#analogy 3:1 for test set
rus2 = under_sampling.RandomUnderSampler(
    sampling_strategy={
        0: synolo2*3,
        1: synolo2,

    },
    random_state=42
)
Companies That Went Bankrupt in training set 173
Healthy companies in training 7864
Healthy companies in test set 75
Companies That Went Bankrupt in test set 2604
In [9]:
X_train_new,y_train_new=rus.fit_resample(X_train,y_train)
X_Test_new,y_test_new=rus2.fit_resample(X_test,y_test)
In [10]:
failed=0
non_failed=0
for i in range(len(y_train_new)):
    if y_train_new[i]==1:
        failed=failed+1
print("Healthy companies in training set",failed)
for i in range(len(y_train_new)):
    if y_train_new[i]==0:
        non_failed=non_failed+1
print("Healthy companies in training set",non_failed)

synolo2=0
synolo1=0
for i in range(len(y_test_new)):
    if y_test_new[i]==1:
        synolo2=synolo2+1
print("Healthy companies in test set",synolo2)
for i in range(len(y_test_new)):
    if y_test_new[i]==0:
        synolo1=synolo1+1
print("Healthy companies in test set",synolo1)

non_healthy_training = failed
non_healthy_test = synolo2
Healthy companies in training set 173
Healthy companies in training set 519
Healthy companies in test set 75
Healthy companies in test set 225
In [11]:
y_train_new_df = pd.DataFrame(y_train_new, columns = ['Label'])
y_train_labels = y_train_new_df.value_counts()
#Plot new data
fig = px.pie(y_train_new_df, names='Label', title='Data after undersampling with 3:1')
fig.show()

Linear Discriminant Analysis¶

In [12]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis()
lda.fit(X_train_new, y_train_new) #fit the model using the training data
#now check for both train and test data, how well the model learned the patterns
lda_y_pred_train = lda.predict(X_train_new)
lda_y_pred_test = lda.predict(X_Test_new)
In [13]:
#calculate the scores
# now check for both train and test data, how well the model learned the patterns
acc_train_lda = accuracy_score(y_train_new, lda_y_pred_train)
acc_test_lda = accuracy_score(y_test_new, lda_y_pred_test)
pre_train_lda = precision_score(y_train_new, lda_y_pred_train,zero_division = 0, average='binary')
pre_test_lda = precision_score(y_test_new, lda_y_pred_test, average='binary')
rec_train_lda = recall_score(y_train_new, lda_y_pred_train, average='binary')
rec_test_lda = recall_score(y_test_new, lda_y_pred_test, average='binary')
f1_train_lda = f1_score(y_train_new, lda_y_pred_train, average='binary')
f1_test_lda = f1_score(y_test_new, lda_y_pred_test, average='binary')
In [14]:
# print the scores
print('Accuracy scores of Linear Discriminant Analysis classifier are:','train: {:.2f}'.format(acc_train_lda), 'and test:{:.2f}.'.format(acc_test_lda))
print('Precision scores of Linear Discriminant Analysis classifier are:','train: {:.2f}'.format(pre_train_lda), 'and test:{:.2f}.'.format(pre_test_lda))
print('Recall scores of Linear Discriminant Analysis classifier are:','train: {:.2f}'.format(rec_train_lda), 'and test:{:.2f}.'.format(rec_test_lda))
print('F1 scores of Linear Discriminant Analysis classifier are:','train: {:.2f}'.format(f1_train_lda), 'and test: {:.2f}.'.format(f1_test_lda))
Accuracy scores of Linear Discriminant Analysis classifier are: train: 0.82 and test:0.77.
Precision scores of Linear Discriminant Analysis classifier are: train: 0.70 and test:0.55.
Recall scores of Linear Discriminant Analysis classifier are: train: 0.48 and test:0.40.
F1 scores of Linear Discriminant Analysis classifier are: train: 0.57 and test: 0.46.
In [15]:
#Classification report for test set
print('                 LDA Test set classification report')
print(classification_report(y_test_new, lda_y_pred_test))
                 LDA Test set classification report
              precision    recall  f1-score   support

           0       0.82      0.89      0.85       225
           1       0.55      0.40      0.46        75

    accuracy                           0.77       300
   macro avg       0.68      0.64      0.66       300
weighted avg       0.75      0.77      0.75       300

In [16]:
#Classification report for train set
print('                 LDA Train set classification report')
print(classification_report(y_train_new, lda_y_pred_train))
                 LDA Train set classification report
              precision    recall  f1-score   support

           0       0.84      0.93      0.89       519
           1       0.70      0.48      0.57       173

    accuracy                           0.82       692
   macro avg       0.77      0.71      0.73       692
weighted avg       0.81      0.82      0.81       692

In [17]:
plt.rcParams["figure.figsize"] = (12,6)


plt.scatter(X_Test_new[lda_y_pred_test==0, 0] , X_Test_new[lda_y_pred_test==0, 1],c='m',marker='o',s=20,label='Class 0' )
plt.scatter(X_Test_new[lda_y_pred_test==1, 0] , X_Test_new[lda_y_pred_test==1, 1],c='b',marker='x',label='Class 1' )
plt.xlabel('$x_1$')
plt.ylabel('$x_2$')
plt.legend(loc='best')
plt.title('Linear Discriminant Analysis')
plt.show()
In [18]:
# Creates a confusion matrix
cm_lda_train = confusion_matrix(y_train_new, lda_y_pred_train)
#plots the confusion matrix

plt.figure(figsize=(6,5))
sns.heatmap(cm_lda_train, annot=True, fmt=".1f")
plt.title('Linear Discriminant Analysis Train\nAccuracy:{0:.3f}'.format(accuracy_score(y_train_new, lda_y_pred_train)))
plt.ylabel('True label')
plt.xlabel('Predicted label')


plt.show()
In [19]:
# Creates a confusion matrix
cm_lda_test = confusion_matrix(y_test_new, lda_y_pred_test)
#plots the confusion matrix
plt.figure(figsize=(6,5))
sns.heatmap(cm_lda_test, annot=True, fmt=".1f")
plt.title('Linear Discriminant Analysis Test\nAccuracy:{0:.3f}'.format(accuracy_score(y_test_new, lda_y_pred_test)))
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
In [20]:
#Print TP FP FN TN
#training 
TP_lda_train = cm_lda_train[1][1]
FP_lda_train = cm_lda_train[1][0]
FN_lda_train = cm_lda_train[0][1]
TN_lda_train = cm_lda_train[0][0]
print('TP is : {:.2f}.'.format(TP_lda_train))
print('FP is : {:.2f}.'.format(FP_lda_train))
print('FN is : {:.2f}.'.format(FN_lda_train))
print('TN is : {:.2f}.'.format(TN_lda_train))

#test
TP_lda_test = cm_lda_test[1][1]
FP_lda_test = cm_lda_test[1][0]
FN_lda_test = cm_lda_test[0][1]
TN_lda_test = cm_lda_test[0][0]
print('TP is : {:.2f}.'.format(TP_lda_test))
print('FP is : {:.2f}.'.format(FP_lda_test))
print('FN is : {:.2f}.'.format(FN_lda_test))
print('TN is : {:.2f}.'.format(TN_lda_test))
TP is : 83.00.
FP is : 90.00.
FN is : 35.00.
TN is : 484.00.
TP is : 30.00.
FP is : 45.00.
FN is : 25.00.
TN is : 200.00.
In [21]:
#Now the normalize the diagonal entries
cm_lda_full = cm_lda_test.astype('float') / cm_lda_test.sum(axis=1)[:, np.newaxis]

#The diagonal entries are the accuracies of each class
cm_lda_full.diagonal()
Out[21]:
array([0.88888889, 0.4       ])
In [22]:
#Now the normalize the diagonal entries
cm_lda_full = cm_lda_train.astype('float') / cm_lda_train.sum(axis=1)[:, np.newaxis]

#The diagonal entries are the accuracies of each class
cm_lda_full.diagonal()
Out[22]:
array([0.93256262, 0.47976879])

Logistic Regression¶

In [23]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()
logreg.fit(X_train_new, y_train_new) #fit the model using the training data
#now check for both train and test data, how well the model learned the patterns
log_y_pred_train = logreg.predict(X_train_new)
log_y_pred_test = logreg.predict(X_Test_new)
In [24]:
#calculate the scores
# now check for both train and test data, how well the model learned the patterns
acc_train_log = accuracy_score(y_train_new, log_y_pred_train)
acc_test_log = accuracy_score(y_test_new, log_y_pred_test)
pre_train_log = precision_score(y_train_new, log_y_pred_train,zero_division = 0, average='macro')
pre_test_log = precision_score(y_test_new, log_y_pred_test,zero_division = 0, average='macro')
rec_train_log = recall_score(y_train_new, log_y_pred_train, average='macro')
rec_test_log = recall_score(y_test_new, log_y_pred_test, average='macro')
f1_train_log = f1_score(y_train_new, log_y_pred_train, average='macro')
f1_test_log = f1_score(y_test_new, log_y_pred_test, average='macro')
In [25]:
# print the scores
print('Accuracy scores of Logistic Regression classifier are:','train: {:.2f}'.format(acc_train_log), 'and test:{:.2f}.'.format(acc_test_log))
print('Precision scores of Logistic Regression classifier are:','train: {:.2f}'.format(pre_train_log), 'and test:{:.2f}.'.format(pre_test_log))
print('Recall scores of Logistic Regression classifier are:','train: {:.2f}'.format(rec_train_log), 'and test:{:.2f}.'.format(rec_test_log))
print('F1 scores of Logistic Regression classifier are:','train: {:.2f}'.format(f1_train_log), 'and test: {:.2f}.'.format(f1_test_log))
Accuracy scores of Logistic Regression classifier are: train: 0.80 and test:0.77.
Precision scores of Logistic Regression classifier are: train: 0.76 and test:0.69.
Recall scores of Logistic Regression classifier are: train: 0.66 and test:0.64.
F1 scores of Logistic Regression classifier are: train: 0.69 and test: 0.65.
In [26]:
print('                LR Train set classification report')
print(classification_report(y_train_new, log_y_pred_train))
                LR Train set classification report
              precision    recall  f1-score   support

           0       0.82      0.95      0.88       519
           1       0.70      0.38      0.49       173

    accuracy                           0.80       692
   macro avg       0.76      0.66      0.69       692
weighted avg       0.79      0.80      0.78       692

In [27]:
print('                LR Test set classification report')
print(classification_report(y_test_new, log_y_pred_test))
                LR Test set classification report
              precision    recall  f1-score   support

           0       0.81      0.91      0.86       225
           1       0.57      0.36      0.44        75

    accuracy                           0.77       300
   macro avg       0.69      0.64      0.65       300
weighted avg       0.75      0.77      0.75       300

In [28]:
# Creates a confusion matrix
cm_log_train = confusion_matrix(y_train_new, log_y_pred_train)
#plots the confusion matrix
plt.figure(figsize=(6,5))
sns.heatmap(cm_log_train, annot=True, fmt=".1f")
plt.title('Logistic Regression Train set\nAccuracy:{0:.3f}'.format(accuracy_score(y_train_new, log_y_pred_train)))
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
In [29]:
# Creates a confusion matrix
cm_log_test = confusion_matrix(y_test_new, log_y_pred_test)
#plots the confusion matrix
plt.figure(figsize=(6,5))
sns.heatmap(cm_log_test, annot=True, fmt=".1f")
plt.title('Logistic Regression Test set\nAccuracy:{0:.3f}'.format(accuracy_score(y_test_new, log_y_pred_test)))
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
In [30]:
#training 
TP_log_train = cm_log_train[1][1]
FP_log_train = cm_log_train[1][0]
FN_log_train = cm_log_train[0][1]
TN_log_train = cm_log_train[0][0]
print('TP is : {:.2f}.'.format(TP_log_train))
print('FP is : {:.2f}.'.format(FP_log_train))
print('FN is : {:.2f}.'.format(FN_log_train))
print('TN is : {:.2f}.'.format(TN_log_train))

#test
TP_log_test = cm_log_test[1][1]
FP_log_test = cm_log_test[1][0]
FN_log_test = cm_log_test[0][1]
TN_log_test = cm_log_test[0][0]
print('TP is : {:.2f}.'.format(TP_log_test))
print('FP is : {:.2f}.'.format(FP_log_test))
print('FN is : {:.2f}.'.format(FN_log_test))
print('TN is : {:.2f}.'.format(TN_log_test))
TP is : 66.00.
FP is : 107.00.
FN is : 28.00.
TN is : 491.00.
TP is : 27.00.
FP is : 48.00.
FN is : 20.00.
TN is : 205.00.
In [31]:
#Now the normalize the diagonal entries
cm_log_train_full = cm_log_train.astype('float') / cm_log_train.sum(axis=1)[:, np.newaxis]

#The diagonal entries are the accuracies of each class
cm_log_train_full.diagonal()
Out[31]:
array([0.9460501 , 0.38150289])
In [32]:
#Now the normalize the diagonal entries
cm_log_full_test = cm_log_test.astype('float') / cm_log_test.sum(axis=1)[:, np.newaxis]

#The diagonal entries are the accuracies of each class
cm_log_full_test.diagonal()
Out[32]:
array([0.91111111, 0.36      ])
In [33]:
plt.scatter(X_Test_new[log_y_pred_test==0, 0] , X_Test_new[log_y_pred_test==0, 1],c='m',marker='o',s=20,label='Class 0' )
plt.scatter(X_Test_new[log_y_pred_test==1, 0] , X_Test_new[log_y_pred_test==1, 1],c='b',marker='x',label='Class 1' )
plt.xlabel('$x_1$')
plt.ylabel('$x_2$')
plt.legend(loc='best')
plt.title('Logistic Regression')
plt.show()

Decision Trees¶

In [34]:
#Decision Trees
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()
clf.fit(X_train_new, y_train_new) #fit the model using the training data
#now check for both train and test data, how well the model learned the patterns
y_pred_dt_train = clf.predict(X_train_new)
y_pred_dt_test = clf.predict(X_Test_new)
In [35]:
#calculate the scores
acc_train_dt = accuracy_score(y_train_new, y_pred_dt_train)
acc_test_dt = accuracy_score(y_test_new, y_pred_dt_test)
pre_train_dt = precision_score(y_train_new, y_pred_dt_train, average='macro')
pre_test_dt = precision_score(y_test_new, y_pred_dt_test, average='macro')
rec_train_dt = recall_score(y_train_new, y_pred_dt_train, average='macro')
rec_test_dt = recall_score(y_test_new, y_pred_dt_test, average='macro')
f1_train_dt = f1_score(y_train_new, y_pred_dt_train, average='macro')
f1_test_dt = f1_score(y_test_new, y_pred_dt_test, average='macro')
In [36]:
print('Accuracy scores of Decision Tree classifier are:',
      'train: {:.2f}'.format(acc_train_dt), 'and test: {:.2f}.'.format(acc_test_dt))
print('Precision scores of Decision Tree classifier are:',
      'train: {:.2f}'.format(pre_train_dt), 'and test: {:.2f}.'.format(pre_test_dt))
print('Recall scores of Decision Tree classifier are:',
      'train: {:.2f}'.format(rec_train_dt), 'and test: {:.2f}.'.format(rec_test_dt))
print('F1 scores of Decision Tree classifier are:',
      'train: {:.2f}'.format(f1_train_dt), 'and test: {:.2f}.'.format(f1_test_dt))
Accuracy scores of Decision Tree classifier are: train: 1.00 and test: 0.75.
Precision scores of Decision Tree classifier are: train: 1.00 and test: 0.68.
Recall scores of Decision Tree classifier are: train: 1.00 and test: 0.70.
F1 scores of Decision Tree classifier are: train: 1.00 and test: 0.68.
In [37]:
from sklearn.metrics import classification_report
print('                DT Train set classification report')
print(classification_report(y_train_new, y_pred_dt_train))
                DT Train set classification report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       519
           1       1.00      1.00      1.00       173

    accuracy                           1.00       692
   macro avg       1.00      1.00      1.00       692
weighted avg       1.00      1.00      1.00       692

In [38]:
print('                DT Test set classification report')
print(classification_report(y_test_new, y_pred_dt_test))
                DT Test set classification report
              precision    recall  f1-score   support

           0       0.85      0.80      0.83       225
           1       0.50      0.59      0.54        75

    accuracy                           0.75       300
   macro avg       0.68      0.70      0.68       300
weighted avg       0.77      0.75      0.76       300

In [39]:
# Creates a confusion matrix
cm_dt_train = confusion_matrix(y_train_new, y_pred_dt_train)
#plots the confusion matrix
plt.figure(figsize=(6,5))
sns.heatmap(cm_dt_train, annot=True, fmt=".1f")
plt.title('Decision Trees Train set\nAccuracy:{0:.3f}'.format(accuracy_score(y_train_new, y_pred_dt_train)))
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
In [40]:
# Creates a confusion matrix
cm_dt_test = confusion_matrix(y_test_new, y_pred_dt_test)
#plots the confusion matrix
plt.figure(figsize=(6,5))
sns.heatmap(cm_dt_test, annot=True, fmt=".1f")
plt.title('Decision Trees Test set\nAccuracy:{0:.3f}'.format(accuracy_score(y_test_new, y_pred_dt_test)))
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
In [41]:
#training 
TP_dt_train = cm_dt_train[1][1]
FP_dt_train = cm_dt_train[1][0]
FN_dt_train = cm_dt_train[0][1]
TN_dt_train = cm_dt_train[0][0]
print('TP is : {:.2f}.'.format(TP_dt_train))
print('FP is : {:.2f}.'.format(FP_dt_train))
print('FN is : {:.2f}.'.format(FN_dt_train))
print('TN is : {:.2f}.'.format(TN_dt_train))

#test
TP_dt_test = cm_dt_test[1][1]
FP_dt_test = cm_dt_test[1][0]
FN_dt_test = cm_dt_test[0][1]
TN_dt_test = cm_dt_test[0][0]
print('TP is : {:.2f}.'.format(TP_dt_test))
print('FP is : {:.2f}.'.format(FP_dt_test))
print('FN is : {:.2f}.'.format(FN_dt_test))
print('TN is : {:.2f}.'.format(TN_dt_test))
TP is : 173.00.
FP is : 0.00.
FN is : 0.00.
TN is : 519.00.
TP is : 44.00.
FP is : 31.00.
FN is : 44.00.
TN is : 181.00.
In [42]:
#Now the normalize the diagonal entries
cm_full_train = cm_dt_train.astype('float') / cm_dt_train.sum(axis=1)[:, np.newaxis]

#The diagonal entries are the accuracies of each class
cm_full_train.diagonal()
Out[42]:
array([1., 1.])
In [43]:
#Now the normalize the diagonal entries
cm_full_test = cm_dt_test.astype('float') / cm_dt_test.sum(axis=1)[:, np.newaxis]

#The diagonal entries are the accuracies of each class
cm_full_test.diagonal()
Out[43]:
array([0.80444444, 0.58666667])
In [44]:
plt.scatter(X_Test_new[y_pred_dt_test==0, 0] , X_Test_new[y_pred_dt_test==0, 1],c='m',marker='o',s=20,label='Class 0' )
plt.scatter(X_Test_new[y_pred_dt_test==1, 0] , X_Test_new[y_pred_dt_test==1, 1],c='b',marker='x',label='Class 1' )
plt.xlabel('$x_1$')
plt.ylabel('$x_2$')
plt.legend(loc='best')
plt.title('Decision Trees')
plt.show()

k-Nearest Neighbors¶

In [45]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train_new, y_train_new) #fit the model using the training data
#now check for both train and test data, how well the model learned the patterns
knn_y_pred_train = knn.predict(X_train_new)
knn_y_pred_test = knn.predict(X_Test_new)
In [46]:
#calculate the scores
knn_acc_train_dt = accuracy_score(y_train_new, knn_y_pred_train)
knn_acc_test_dt = accuracy_score(y_test_new, knn_y_pred_test)
knn_pre_train_dt = precision_score(y_train_new, knn_y_pred_train, average='macro')
knn_pre_test_dt = precision_score(y_test_new, knn_y_pred_test, average='macro')
knn_rec_train_dt = recall_score(y_train_new, knn_y_pred_train, average='macro')
knn_rec_test_dt = recall_score(y_test_new, knn_y_pred_test, average='macro')
knn_f1_train_dt = f1_score(y_train_new, knn_y_pred_train, average='macro')
knn_f1_test_dt = f1_score(y_test_new, knn_y_pred_test, average='macro')
In [47]:
print('Accuracy scores of k-Nearest Neighbors classifier are:',
      'train: {:.2f}'.format(knn_acc_train_dt), 'and test: {:.2f}.'.format(knn_acc_test_dt))
print('Precision scores of k-Nearest Neighbors classifier are:',
      'train: {:.2f}'.format(knn_pre_train_dt), 'and test: {:.2f}.'.format(knn_pre_test_dt))
print('Recall scores of k-Nearest Neighbors classifier are:',
      'train: {:.2f}'.format(knn_rec_train_dt), 'and test: {:.2f}.'.format(knn_rec_test_dt))
print('F1 scores of k-Nearest Neighbors classifier are:',
      'train: {:.2f}'.format(knn_f1_train_dt), 'and test: {:.2f}.'.format(knn_f1_test_dt))
Accuracy scores of k-Nearest Neighbors classifier are: train: 0.86 and test: 0.75.
Precision scores of k-Nearest Neighbors classifier are: train: 0.83 and test: 0.65.
Recall scores of k-Nearest Neighbors classifier are: train: 0.77 and test: 0.62.
F1 scores of k-Nearest Neighbors classifier are: train: 0.80 and test: 0.63.
In [48]:
print('                KNN Test set classification report')
print(classification_report(y_test_new, knn_y_pred_test))
                KNN Test set classification report
              precision    recall  f1-score   support

           0       0.80      0.88      0.84       225
           1       0.50      0.35      0.41        75

    accuracy                           0.75       300
   macro avg       0.65      0.62      0.63       300
weighted avg       0.73      0.75      0.73       300

In [49]:
print('                KNN Train set classification report')
print(classification_report(y_train_new, knn_y_pred_train))
                KNN Train set classification report
              precision    recall  f1-score   support

           0       0.88      0.95      0.91       519
           1       0.79      0.60      0.68       173

    accuracy                           0.86       692
   macro avg       0.83      0.77      0.80       692
weighted avg       0.85      0.86      0.85       692

In [50]:
# Creates a confusion matrix
cm_knn_train = confusion_matrix(y_train_new, knn_y_pred_train)
#plots the confusion matrix
plt.figure(figsize=(6,5))
sns.heatmap(cm_knn_train, annot=True, fmt=".1f")
plt.title('k-Nearest Neighbors Train set\nAccuracy:{0:.3f}'.format(accuracy_score(y_train_new, knn_y_pred_train)))
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
In [51]:
# Creates a confusion matrix
cm_knn_test = confusion_matrix(y_test_new, knn_y_pred_test)
#plots the confusion matrix
plt.figure(figsize=(6,5))
sns.heatmap(cm_knn_test, annot=True, fmt=".1f")
plt.title('k-Nearest Neighbors Test set\nAccuracy:{0:.3f}'.format(accuracy_score(y_test_new, knn_y_pred_test)))
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
In [52]:
#training 
TP_knn_train = cm_knn_train[1][1]
FP_knn_train = cm_knn_train[1][0]
FN_knn_train = cm_knn_train[0][1]
TN_knn_train = cm_knn_train[0][0]
print('TP is : {:.2f}.'.format(TP_knn_train))
print('FP is : {:.2f}.'.format(FP_knn_train))
print('FN is : {:.2f}.'.format(FN_knn_train))
print('TN is : {:.2f}.'.format(TN_knn_train))

#test
TP_knn_test = cm_knn_test[1][1]
FP_knn_test = cm_knn_test[1][0]
FN_knn_test = cm_knn_test[0][1]
TN_knn_test = cm_knn_test[0][0]
print('TP is : {:.2f}.'.format(TP_knn_test))
print('FP is : {:.2f}.'.format(FP_knn_test))
print('FN is : {:.2f}.'.format(FN_knn_test))
print('TN is : {:.2f}.'.format(TN_knn_test))
TP is : 104.00.
FP is : 69.00.
FN is : 28.00.
TN is : 491.00.
TP is : 26.00.
FP is : 49.00.
FN is : 26.00.
TN is : 199.00.
In [53]:
#Now the normalize the diagonal entries
cm_knn_train_full = cm_knn_train.astype('float') / cm_knn_train.sum(axis=1)[:, np.newaxis]

#The diagonal entries are the accuracies of each class
cm_knn_train_full.diagonal()
Out[53]:
array([0.9460501 , 0.60115607])
In [54]:
#Now the normalize the diagonal entries
cm_knn_train_full = cm_knn_test.astype('float') / cm_knn_test.sum(axis=1)[:, np.newaxis]

#The diagonal entries are the accuracies of each class
cm_knn_train_full.diagonal()
Out[54]:
array([0.88444444, 0.34666667])
In [55]:
plt.scatter(X_Test_new[knn_y_pred_test==0, 0] , X_Test_new[knn_y_pred_test==0, 1],c='m',marker='o',s=20,label='Class 0' )
plt.scatter(X_Test_new[knn_y_pred_test==1, 0] , X_Test_new[knn_y_pred_test==1, 1],c='b',marker='x',label='Class 1' )
plt.xlabel('$x_1$')
plt.ylabel('$x_2$')
plt.legend(loc='best')
plt.title('k-Nearest Neighbors')
plt.show()

Naïve Bayes¶

In [56]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train_new, y_train_new) #fit the model using the training data
# now check for both train and test data, how well the model learned the patterns
nb_y_pred_train = gnb.predict(X_train_new)
nb_y_pred_test = gnb.predict(X_Test_new)
In [57]:
#calculate the scores
nb_acc_train = accuracy_score(y_train_new, nb_y_pred_train)
nb_acc_test = accuracy_score(y_test_new, nb_y_pred_test)
nb_pre_train = precision_score(y_train_new, nb_y_pred_train, average='macro')
nb_pre_test = precision_score(y_test_new, nb_y_pred_test, average='macro')
nb_rec_train = recall_score(y_train_new, nb_y_pred_train, average='macro')
nb_rec_test = recall_score(y_test_new, nb_y_pred_test, average='macro')
nb_f1_train = f1_score(y_train_new, nb_y_pred_train, average='macro')
nb_f1_test = f1_score(y_test_new, nb_y_pred_test, average='macro')
In [58]:
print('Accuracy scores of Naïve Bayes classifier are:',
      'train: {:.2f}'.format(nb_acc_train), 'and test: {:.2f}.'.format(nb_acc_test))
print('Precision scores of Naïve Bayes classifier are:',
      'train: {:.2f}'.format(nb_pre_train), 'and test: {:.2f}.'.format(nb_pre_test))
print('Recall scores of Naïve Bayes classifier are:',
      'train: {:.2f}'.format(nb_rec_train), 'and test: {:.2f}.'.format(nb_rec_test))
print('F1 scores of Naïve Bayes classifier are:',
      'train: {:.2f}'.format(nb_f1_train), 'and test: {:.2f}.'.format(nb_f1_test))
Accuracy scores of Naïve Bayes classifier are: train: 0.77 and test: 0.78.
Precision scores of Naïve Bayes classifier are: train: 0.69 and test: 0.71.
Recall scores of Naïve Bayes classifier are: train: 0.67 and test: 0.71.
F1 scores of Naïve Bayes classifier are: train: 0.68 and test: 0.71.
In [59]:
print('            Naïve Bayes Test set classification report')
print(classification_report(y_test_new, nb_y_pred_test))
            Naïve Bayes Test set classification report
              precision    recall  f1-score   support

           0       0.85      0.86      0.86       225
           1       0.57      0.56      0.56        75

    accuracy                           0.78       300
   macro avg       0.71      0.71      0.71       300
weighted avg       0.78      0.78      0.78       300

In [60]:
print('            Naïve Bayes Train set classification report')
print(classification_report(y_train_new, nb_y_pred_train))
            Naïve Bayes Train set classification report
              precision    recall  f1-score   support

           0       0.83      0.87      0.85       519
           1       0.55      0.47      0.51       173

    accuracy                           0.77       692
   macro avg       0.69      0.67      0.68       692
weighted avg       0.76      0.77      0.77       692

In [61]:
# Creates a confusion matrix
cm_nb_train = confusion_matrix(y_train_new, nb_y_pred_train)
#plots the confusion matrix
plt.figure(figsize=(6,5))
sns.heatmap(cm_nb_train, annot=True, fmt=".1f")
plt.title('Naïve Bayes Train set\nAccuracy:{0:.3f}'.format(accuracy_score(y_train_new, nb_y_pred_train)))
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
In [62]:
# Creates a confusion matrix
cm_nb_test = confusion_matrix(y_test_new, nb_y_pred_test)
#plots the confusion matrix
plt.figure(figsize=(6,5))
sns.heatmap(cm_nb_test, annot=True, fmt=".1f")
plt.title('Naïve Bayes Test set\nAccuracy:{0:.3f}'.format(accuracy_score(y_test_new, nb_y_pred_test)))
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
In [63]:
#training 
TP_nb_train = cm_nb_train[1][1]
FP_nb_train = cm_nb_train[1][0]
FN_nb_train = cm_nb_train[0][1]
TN_nb_train = cm_nb_train[0][0]
print('TP is : {:.2f}.'.format(TP_nb_train))
print('FP is : {:.2f}.'.format(FP_nb_train))
print('FN is : {:.2f}.'.format(FN_nb_train))
print('TN is : {:.2f}.'.format(TN_nb_train))

#test
TP_nb_test = cm_nb_test[1][1]
FP_nb_test = cm_nb_test[1][0]
FN_nb_test = cm_nb_test[0][1]
TN_nb_test = cm_nb_test[0][0]
print('TP is : {:.2f}.'.format(TP_nb_test))
print('FP is : {:.2f}.'.format(FP_nb_test))
print('FN is : {:.2f}.'.format(FN_nb_test))
print('TN is : {:.2f}.'.format(TN_nb_test))
TP is : 82.00.
FP is : 91.00.
FN is : 66.00.
TN is : 453.00.
TP is : 42.00.
FP is : 33.00.
FN is : 32.00.
TN is : 193.00.
In [64]:
#Now the normalize the diagonal entries
cm_nb_train_full = cm_nb_train.astype('float') / cm_nb_train.sum(axis=1)[:, np.newaxis]

#The diagonal entries are the accuracies of each class
cm_nb_train_full.diagonal()
Out[64]:
array([0.87283237, 0.47398844])
In [65]:
#Now the normalize the diagonal entries
cm_nb_test_full = cm_nb_test.astype('float') / cm_nb_test.sum(axis=1)[:, np.newaxis]

#The diagonal entries are the accuracies of each class
cm_nb_test_full.diagonal()
Out[65]:
array([0.85777778, 0.56      ])
In [66]:
plt.scatter(X_Test_new[nb_y_pred_test==0, 0] , X_Test_new[nb_y_pred_test==0, 1],c='m',marker='o',s=20,label='Class 0' )
plt.scatter(X_Test_new[nb_y_pred_test==1, 0] , X_Test_new[nb_y_pred_test==1, 1],c='b',marker='x',label='Class 1' )
plt.xlabel('$x_1$')
plt.ylabel('$x_2$')
plt.legend(loc='best')
plt.title('Naïve Bayes')
plt.show()
In [67]:
#Support Vector Machines
In [68]:
from sklearn.svm import SVC
svm = SVC()
svm.fit(X_train_new, y_train_new) #fit the model using the training data
# now check for both train and test data, how well the model learned the patterns
svm_y_pred_train = svm.predict(X_train_new)
svm_y_pred_test = svm.predict(X_Test_new)
In [69]:
#calculate the scores
svm_acc_train = accuracy_score(y_train_new, svm_y_pred_train)
svm_acc_test = accuracy_score(y_test_new, svm_y_pred_test)
svm_pre_train = precision_score(y_train_new, svm_y_pred_train, average='macro', zero_division=0)
svm_pre_test = precision_score(y_test_new, svm_y_pred_test, average='macro',zero_division=0)
svm_rec_train = recall_score(y_train_new, svm_y_pred_train, average='macro')
svm_rec_test = recall_score(y_test_new, svm_y_pred_test, average='macro')
svm_f1_train = f1_score(y_train_new, svm_y_pred_train, average='macro')
svm_f1_test = f1_score(y_test_new, svm_y_pred_test, average='macro')
In [70]:
print('Accuracy scores of SVM classifier are:',
      'train: {:.2f}'.format(svm_acc_train), 'and test: {:.2f}.'.format(svm_acc_test))
print('Precision scores of SVM classifier are:',
      'train: {:.2f}'.format(svm_pre_train), 'and test: {:.2f}.'.format(svm_pre_test))
print('Recall scores of SVM classifier are:',
      'train: {:.2f}'.format(svm_rec_train), 'and test: {:.2f}.'.format(svm_rec_test))
print('F1 scores of SVM classifier are:',
      'train: {:.2f}'.format(svm_f1_train), 'and test: {:.2f}.'.format(svm_f1_test))
Accuracy scores of SVM classifier are: train: 0.83 and test: 0.78.
Precision scores of SVM classifier are: train: 0.84 and test: 0.70.
Recall scores of SVM classifier are: train: 0.68 and test: 0.62.
F1 scores of SVM classifier are: train: 0.72 and test: 0.63.
In [71]:
print('                SVM Test set classification report')
print(classification_report(y_test_new, svm_y_pred_test))
                SVM Test set classification report
              precision    recall  f1-score   support

           0       0.80      0.93      0.86       225
           1       0.61      0.31      0.41        75

    accuracy                           0.78       300
   macro avg       0.70      0.62      0.63       300
weighted avg       0.75      0.78      0.75       300

In [72]:
print('                SVM Train set classification report')
print(classification_report(y_train_new, svm_y_pred_train))
                SVM Train set classification report
              precision    recall  f1-score   support

           0       0.83      0.98      0.90       519
           1       0.86      0.39      0.53       173

    accuracy                           0.83       692
   macro avg       0.84      0.68      0.72       692
weighted avg       0.84      0.83      0.81       692

In [73]:
# Creates a confusion matrix
cm_svm_test = confusion_matrix(y_test_new, svm_y_pred_test)
#plots the confusion matrix
plt.figure(figsize=(6,5))
sns.heatmap(cm_svm_test, annot=True, fmt=".1f")
plt.title('SVM Test set\nAccuracy:{0:.3f}'.format(accuracy_score(y_test_new, svm_y_pred_test)))
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
In [74]:
# Creates a confusion matrix
cm_svm_train = confusion_matrix(y_train_new, svm_y_pred_train)
#plots the confusion matrix
plt.figure(figsize=(6,5))
sns.heatmap(cm_svm_train, annot=True, fmt=".1f")
plt.title('SVM Train set\nAccuracy:{0:.3f}'.format(accuracy_score(y_train_new, svm_y_pred_train)))
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
In [75]:
#training 
TP_svm_train = cm_svm_train[1][1]
FP_svm_train = cm_svm_train[1][0]
FN_svm_train = cm_svm_train[0][1]
TN_svm_train = cm_svm_train[0][0]
print('TP is : {:.2f}.'.format(TP_svm_train))
print('FP is : {:.2f}.'.format(FP_svm_train))
print('FN is : {:.2f}.'.format(FN_svm_train))
print('TN is : {:.2f}.'.format(TN_svm_train))

#test
TP_svm_test = cm_svm_test[1][1]
FP_svm_test = cm_svm_test[1][0]
FN_svm_test = cm_svm_test[0][1]
TN_svm_test = cm_svm_test[0][0]
print('TP is : {:.2f}.'.format(TP_svm_test))
print('FP is : {:.2f}.'.format(FP_svm_test))
print('FN is : {:.2f}.'.format(FN_svm_test))
print('TN is : {:.2f}.'.format(TN_svm_test))
TP is : 67.00.
FP is : 106.00.
FN is : 11.00.
TN is : 508.00.
TP is : 23.00.
FP is : 52.00.
FN is : 15.00.
TN is : 210.00.
In [76]:
#Now the normalize the diagonal entries
cm_svm_full = cm_svm_train.astype('float') / cm_svm_train.sum(axis=1)[:, np.newaxis]

#The diagonal entries are the accuracies of each class
cm_svm_full.diagonal()
Out[76]:
array([0.97880539, 0.38728324])
In [77]:
#Now the normalize the diagonal entries
cm_svm_full = cm_svm_test.astype('float') / cm_svm_test.sum(axis=1)[:, np.newaxis]

#The diagonal entries are the accuracies of each class
cm_svm_full.diagonal()
Out[77]:
array([0.93333333, 0.30666667])
In [78]:
plt.scatter(X_Test_new[svm_y_pred_test==0, 0] , X_Test_new[svm_y_pred_test==0, 1],c='m',marker='o',s=20,label='Class 0' )
plt.scatter(X_Test_new[svm_y_pred_test==1, 0] , X_Test_new[svm_y_pred_test==1, 1],c='b',marker='x',label='Class 1' )
plt.xlabel('$x_1$')
plt.ylabel('$x_2$')
plt.legend(loc='best')
plt.title('SVM')
plt.show()
In [79]:
#Neural Networks
In [80]:
# ANN
model =  Sequential()
model.add(Dense(units=8,activation='relu'))
model.add(Dropout(0.10))

model.add(Dense(units=4,activation='relu'))

model.add(Dense(units=1,activation='sigmoid'))

# compile ANN
model.compile(loss='binary_crossentropy', optimizer='adam',metrics = ['accuracy'])

#Fit the model
# Train ANN
model.fit(x=X_train_new, 
          y=y_train_new, 
          epochs=120,
          validation_data=(X_Test_new, y_test_new), verbose=1
          )
# initilize the model
model.summary()
Epoch 1/120
22/22 [==============================] - 2s 19ms/step - loss: 0.6476 - accuracy: 0.7500 - val_loss: 0.6353 - val_accuracy: 0.7500
Epoch 2/120
22/22 [==============================] - 0s 5ms/step - loss: 0.6279 - accuracy: 0.7500 - val_loss: 0.6149 - val_accuracy: 0.7500
Epoch 3/120
22/22 [==============================] - 0s 5ms/step - loss: 0.6038 - accuracy: 0.7500 - val_loss: 0.5954 - val_accuracy: 0.7500
Epoch 4/120
22/22 [==============================] - 0s 6ms/step - loss: 0.5830 - accuracy: 0.7500 - val_loss: 0.5815 - val_accuracy: 0.7500
Epoch 5/120
22/22 [==============================] - 0s 5ms/step - loss: 0.5730 - accuracy: 0.7500 - val_loss: 0.5698 - val_accuracy: 0.7500
Epoch 6/120
22/22 [==============================] - 0s 5ms/step - loss: 0.5562 - accuracy: 0.7500 - val_loss: 0.5592 - val_accuracy: 0.7500
Epoch 7/120
22/22 [==============================] - 0s 6ms/step - loss: 0.5502 - accuracy: 0.7500 - val_loss: 0.5506 - val_accuracy: 0.7500
Epoch 8/120
22/22 [==============================] - 0s 5ms/step - loss: 0.5448 - accuracy: 0.7500 - val_loss: 0.5421 - val_accuracy: 0.7500
Epoch 9/120
22/22 [==============================] - 0s 5ms/step - loss: 0.5289 - accuracy: 0.7500 - val_loss: 0.5345 - val_accuracy: 0.7500
Epoch 10/120
22/22 [==============================] - 0s 5ms/step - loss: 0.5313 - accuracy: 0.7500 - val_loss: 0.5266 - val_accuracy: 0.7500
Epoch 11/120
22/22 [==============================] - 0s 5ms/step - loss: 0.5243 - accuracy: 0.7500 - val_loss: 0.5192 - val_accuracy: 0.7500
Epoch 12/120
22/22 [==============================] - 0s 5ms/step - loss: 0.5069 - accuracy: 0.7500 - val_loss: 0.5116 - val_accuracy: 0.7500
Epoch 13/120
22/22 [==============================] - 0s 6ms/step - loss: 0.5079 - accuracy: 0.7500 - val_loss: 0.5044 - val_accuracy: 0.7500
Epoch 14/120
22/22 [==============================] - 0s 5ms/step - loss: 0.5010 - accuracy: 0.7500 - val_loss: 0.4974 - val_accuracy: 0.7500
Epoch 15/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4960 - accuracy: 0.7500 - val_loss: 0.4910 - val_accuracy: 0.7500
Epoch 16/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4914 - accuracy: 0.7500 - val_loss: 0.4848 - val_accuracy: 0.7500
Epoch 17/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4795 - accuracy: 0.7500 - val_loss: 0.4788 - val_accuracy: 0.7500
Epoch 18/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4803 - accuracy: 0.7514 - val_loss: 0.4735 - val_accuracy: 0.7500
Epoch 19/120
22/22 [==============================] - 0s 6ms/step - loss: 0.4670 - accuracy: 0.7558 - val_loss: 0.4678 - val_accuracy: 0.7433
Epoch 20/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4685 - accuracy: 0.7616 - val_loss: 0.4647 - val_accuracy: 0.7567
Epoch 21/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4650 - accuracy: 0.7645 - val_loss: 0.4617 - val_accuracy: 0.7567
Epoch 22/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4574 - accuracy: 0.7688 - val_loss: 0.4592 - val_accuracy: 0.7500
Epoch 23/120
22/22 [==============================] - 0s 4ms/step - loss: 0.4594 - accuracy: 0.7572 - val_loss: 0.4571 - val_accuracy: 0.7533
Epoch 24/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4542 - accuracy: 0.7630 - val_loss: 0.4546 - val_accuracy: 0.7500
Epoch 25/120
22/22 [==============================] - 0s 4ms/step - loss: 0.4560 - accuracy: 0.7746 - val_loss: 0.4530 - val_accuracy: 0.7533
Epoch 26/120
22/22 [==============================] - 0s 4ms/step - loss: 0.4476 - accuracy: 0.7746 - val_loss: 0.4514 - val_accuracy: 0.7533
Epoch 27/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4544 - accuracy: 0.7702 - val_loss: 0.4503 - val_accuracy: 0.7600
Epoch 28/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4550 - accuracy: 0.7731 - val_loss: 0.4490 - val_accuracy: 0.7600
Epoch 29/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4497 - accuracy: 0.7803 - val_loss: 0.4485 - val_accuracy: 0.7567
Epoch 30/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4472 - accuracy: 0.7847 - val_loss: 0.4468 - val_accuracy: 0.7600
Epoch 31/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4470 - accuracy: 0.7890 - val_loss: 0.4459 - val_accuracy: 0.7633
Epoch 32/120
22/22 [==============================] - 0s 4ms/step - loss: 0.4436 - accuracy: 0.7789 - val_loss: 0.4450 - val_accuracy: 0.7667
Epoch 33/120
22/22 [==============================] - 0s 4ms/step - loss: 0.4443 - accuracy: 0.7962 - val_loss: 0.4446 - val_accuracy: 0.7633
Epoch 34/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4470 - accuracy: 0.7832 - val_loss: 0.4438 - val_accuracy: 0.7633
Epoch 35/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4474 - accuracy: 0.7861 - val_loss: 0.4432 - val_accuracy: 0.7667
Epoch 36/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4388 - accuracy: 0.7962 - val_loss: 0.4430 - val_accuracy: 0.7667
Epoch 37/120
22/22 [==============================] - 0s 6ms/step - loss: 0.4484 - accuracy: 0.7948 - val_loss: 0.4417 - val_accuracy: 0.7667
Epoch 38/120
22/22 [==============================] - 0s 4ms/step - loss: 0.4359 - accuracy: 0.7890 - val_loss: 0.4414 - val_accuracy: 0.7667
Epoch 39/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4398 - accuracy: 0.7934 - val_loss: 0.4409 - val_accuracy: 0.7667
Epoch 40/120
22/22 [==============================] - 0s 4ms/step - loss: 0.4410 - accuracy: 0.7847 - val_loss: 0.4410 - val_accuracy: 0.7633
Epoch 41/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4425 - accuracy: 0.7934 - val_loss: 0.4400 - val_accuracy: 0.7567
Epoch 42/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4392 - accuracy: 0.7934 - val_loss: 0.4397 - val_accuracy: 0.7533
Epoch 43/120
22/22 [==============================] - 0s 4ms/step - loss: 0.4368 - accuracy: 0.7977 - val_loss: 0.4396 - val_accuracy: 0.7567
Epoch 44/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4404 - accuracy: 0.7934 - val_loss: 0.4396 - val_accuracy: 0.7600
Epoch 45/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4267 - accuracy: 0.7991 - val_loss: 0.4394 - val_accuracy: 0.7600
Epoch 46/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4443 - accuracy: 0.8020 - val_loss: 0.4384 - val_accuracy: 0.7533
Epoch 47/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4330 - accuracy: 0.8006 - val_loss: 0.4380 - val_accuracy: 0.7567
Epoch 48/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4373 - accuracy: 0.8092 - val_loss: 0.4376 - val_accuracy: 0.7567
Epoch 49/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4273 - accuracy: 0.7934 - val_loss: 0.4376 - val_accuracy: 0.7533
Epoch 50/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4338 - accuracy: 0.8006 - val_loss: 0.4370 - val_accuracy: 0.7600
Epoch 51/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4372 - accuracy: 0.8064 - val_loss: 0.4368 - val_accuracy: 0.7700
Epoch 52/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4328 - accuracy: 0.7991 - val_loss: 0.4365 - val_accuracy: 0.7700
Epoch 53/120
22/22 [==============================] - 0s 6ms/step - loss: 0.4357 - accuracy: 0.8049 - val_loss: 0.4364 - val_accuracy: 0.7667
Epoch 54/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4314 - accuracy: 0.8049 - val_loss: 0.4362 - val_accuracy: 0.7700
Epoch 55/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4337 - accuracy: 0.8049 - val_loss: 0.4361 - val_accuracy: 0.7700
Epoch 56/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4356 - accuracy: 0.7934 - val_loss: 0.4356 - val_accuracy: 0.7700
Epoch 57/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4201 - accuracy: 0.8136 - val_loss: 0.4355 - val_accuracy: 0.7700
Epoch 58/120
22/22 [==============================] - 0s 4ms/step - loss: 0.4289 - accuracy: 0.7977 - val_loss: 0.4354 - val_accuracy: 0.7700
Epoch 59/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4299 - accuracy: 0.8006 - val_loss: 0.4355 - val_accuracy: 0.7733
Epoch 60/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4373 - accuracy: 0.7919 - val_loss: 0.4348 - val_accuracy: 0.7767
Epoch 61/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4258 - accuracy: 0.8064 - val_loss: 0.4350 - val_accuracy: 0.7700
Epoch 62/120
22/22 [==============================] - 0s 6ms/step - loss: 0.4219 - accuracy: 0.7962 - val_loss: 0.4349 - val_accuracy: 0.7733
Epoch 63/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4244 - accuracy: 0.7991 - val_loss: 0.4343 - val_accuracy: 0.7800
Epoch 64/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4265 - accuracy: 0.8092 - val_loss: 0.4350 - val_accuracy: 0.7733
Epoch 65/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4294 - accuracy: 0.7948 - val_loss: 0.4346 - val_accuracy: 0.7733
Epoch 66/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4254 - accuracy: 0.8078 - val_loss: 0.4336 - val_accuracy: 0.7767
Epoch 67/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4291 - accuracy: 0.8136 - val_loss: 0.4334 - val_accuracy: 0.7800
Epoch 68/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4251 - accuracy: 0.8078 - val_loss: 0.4341 - val_accuracy: 0.7733
Epoch 69/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4293 - accuracy: 0.8035 - val_loss: 0.4334 - val_accuracy: 0.7833
Epoch 70/120
22/22 [==============================] - 0s 4ms/step - loss: 0.4275 - accuracy: 0.8136 - val_loss: 0.4331 - val_accuracy: 0.7767
Epoch 71/120
22/22 [==============================] - 0s 4ms/step - loss: 0.4281 - accuracy: 0.8150 - val_loss: 0.4330 - val_accuracy: 0.7733
Epoch 72/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4245 - accuracy: 0.7991 - val_loss: 0.4329 - val_accuracy: 0.7800
Epoch 73/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4266 - accuracy: 0.8136 - val_loss: 0.4325 - val_accuracy: 0.7833
Epoch 74/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4204 - accuracy: 0.8223 - val_loss: 0.4326 - val_accuracy: 0.7767
Epoch 75/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4312 - accuracy: 0.8035 - val_loss: 0.4333 - val_accuracy: 0.7733
Epoch 76/120
22/22 [==============================] - 0s 4ms/step - loss: 0.4261 - accuracy: 0.8064 - val_loss: 0.4323 - val_accuracy: 0.7767
Epoch 77/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4244 - accuracy: 0.8064 - val_loss: 0.4321 - val_accuracy: 0.7767
Epoch 78/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4243 - accuracy: 0.8150 - val_loss: 0.4318 - val_accuracy: 0.7867
Epoch 79/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4269 - accuracy: 0.8078 - val_loss: 0.4320 - val_accuracy: 0.7800
Epoch 80/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4202 - accuracy: 0.8121 - val_loss: 0.4318 - val_accuracy: 0.7867
Epoch 81/120
22/22 [==============================] - 0s 6ms/step - loss: 0.4206 - accuracy: 0.8020 - val_loss: 0.4320 - val_accuracy: 0.7867
Epoch 82/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4122 - accuracy: 0.8223 - val_loss: 0.4317 - val_accuracy: 0.7867
Epoch 83/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4201 - accuracy: 0.8136 - val_loss: 0.4319 - val_accuracy: 0.7867
Epoch 84/120
22/22 [==============================] - 0s 4ms/step - loss: 0.4180 - accuracy: 0.8107 - val_loss: 0.4318 - val_accuracy: 0.7767
Epoch 85/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4207 - accuracy: 0.8107 - val_loss: 0.4312 - val_accuracy: 0.7867
Epoch 86/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4202 - accuracy: 0.8237 - val_loss: 0.4312 - val_accuracy: 0.7833
Epoch 87/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4148 - accuracy: 0.8150 - val_loss: 0.4317 - val_accuracy: 0.7833
Epoch 88/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4159 - accuracy: 0.8150 - val_loss: 0.4315 - val_accuracy: 0.7833
Epoch 89/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4206 - accuracy: 0.8006 - val_loss: 0.4311 - val_accuracy: 0.7833
Epoch 90/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4110 - accuracy: 0.8194 - val_loss: 0.4312 - val_accuracy: 0.7833
Epoch 91/120
22/22 [==============================] - 0s 4ms/step - loss: 0.4166 - accuracy: 0.8092 - val_loss: 0.4320 - val_accuracy: 0.7833
Epoch 92/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4115 - accuracy: 0.8150 - val_loss: 0.4310 - val_accuracy: 0.7867
Epoch 93/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4131 - accuracy: 0.8092 - val_loss: 0.4312 - val_accuracy: 0.7833
Epoch 94/120
22/22 [==============================] - 0s 6ms/step - loss: 0.4125 - accuracy: 0.8092 - val_loss: 0.4310 - val_accuracy: 0.7867
Epoch 95/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4091 - accuracy: 0.8179 - val_loss: 0.4314 - val_accuracy: 0.7833
Epoch 96/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4167 - accuracy: 0.8121 - val_loss: 0.4311 - val_accuracy: 0.7833
Epoch 97/120
22/22 [==============================] - 0s 4ms/step - loss: 0.4112 - accuracy: 0.8208 - val_loss: 0.4309 - val_accuracy: 0.7800
Epoch 98/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4125 - accuracy: 0.8121 - val_loss: 0.4310 - val_accuracy: 0.7867
Epoch 99/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4146 - accuracy: 0.8165 - val_loss: 0.4312 - val_accuracy: 0.7867
Epoch 100/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4093 - accuracy: 0.8136 - val_loss: 0.4309 - val_accuracy: 0.7800
Epoch 101/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4156 - accuracy: 0.8121 - val_loss: 0.4307 - val_accuracy: 0.7800
Epoch 102/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4057 - accuracy: 0.8107 - val_loss: 0.4308 - val_accuracy: 0.7833
Epoch 103/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4146 - accuracy: 0.8107 - val_loss: 0.4308 - val_accuracy: 0.7833
Epoch 104/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4079 - accuracy: 0.8237 - val_loss: 0.4307 - val_accuracy: 0.7933
Epoch 105/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4069 - accuracy: 0.8107 - val_loss: 0.4306 - val_accuracy: 0.7933
Epoch 106/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4067 - accuracy: 0.8092 - val_loss: 0.4308 - val_accuracy: 0.7833
Epoch 107/120
22/22 [==============================] - 0s 6ms/step - loss: 0.4084 - accuracy: 0.8136 - val_loss: 0.4305 - val_accuracy: 0.7933
Epoch 108/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4137 - accuracy: 0.8150 - val_loss: 0.4304 - val_accuracy: 0.7933
Epoch 109/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4087 - accuracy: 0.8208 - val_loss: 0.4308 - val_accuracy: 0.7867
Epoch 110/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4033 - accuracy: 0.8194 - val_loss: 0.4308 - val_accuracy: 0.7900
Epoch 111/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4109 - accuracy: 0.8194 - val_loss: 0.4305 - val_accuracy: 0.7933
Epoch 112/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4056 - accuracy: 0.8121 - val_loss: 0.4306 - val_accuracy: 0.7900
Epoch 113/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4093 - accuracy: 0.8165 - val_loss: 0.4303 - val_accuracy: 0.8000
Epoch 114/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4080 - accuracy: 0.8121 - val_loss: 0.4305 - val_accuracy: 0.8000
Epoch 115/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4105 - accuracy: 0.8150 - val_loss: 0.4305 - val_accuracy: 0.8000
Epoch 116/120
22/22 [==============================] - 0s 6ms/step - loss: 0.3980 - accuracy: 0.8237 - val_loss: 0.4308 - val_accuracy: 0.8000
Epoch 117/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4015 - accuracy: 0.8121 - val_loss: 0.4307 - val_accuracy: 0.7967
Epoch 118/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4055 - accuracy: 0.8136 - val_loss: 0.4309 - val_accuracy: 0.7967
Epoch 119/120
22/22 [==============================] - 0s 4ms/step - loss: 0.4038 - accuracy: 0.8165 - val_loss: 0.4313 - val_accuracy: 0.7967
Epoch 120/120
22/22 [==============================] - 0s 5ms/step - loss: 0.4079 - accuracy: 0.8251 - val_loss: 0.4316 - val_accuracy: 0.7967
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 dense (Dense)               (None, 8)                 88        
                                                                 
 dropout (Dropout)           (None, 8)                 0         
                                                                 
 dense_1 (Dense)             (None, 4)                 36        
                                                                 
 dense_2 (Dense)             (None, 1)                 5         
                                                                 
=================================================================
Total params: 129
Trainable params: 129
Non-trainable params: 0
_________________________________________________________________
In [81]:
# model history to df
loss_plot = pd.DataFrame(model.history.history)
accuracy_plot = pd.DataFrame(model.history.history)

#  accuracy and loss plot
fig, (ax1, ax2) = plt.subplots(1, 2,figsize=(14,4))
plt.style.use('seaborn')
ax1.plot(loss_plot.loc[:, ['loss']], label='Training loss');
ax1.plot(loss_plot.loc[:, ['val_loss']],label='Validation loss');
ax1.set_title('Training and Validation loss')
ax1.set_xlabel('epochs')
ax1.set_ylabel('Loss')
ax1.legend(loc="best");

ax2.plot(accuracy_plot.loc[:, ['accuracy']],label='Training_accuracy');
ax2.plot(accuracy_plot.loc[:, ['val_accuracy']], label='Validation_accuracy');
ax2.set_title('Training_and_Validation_accuracy');
ax2.set_xlabel('epochs')
ax2.set_ylabel('accuracy')
ax2.legend(loc="best");
In [82]:
y_pred_train = model.predict(X_train_new)
y_pred_test = model.predict(X_Test_new)
22/22 [==============================] - 0s 2ms/step
10/10 [==============================] - 0s 2ms/step
In [83]:
y_pred_test = np.round(abs(y_pred_test))
y_pred_train = np.round(abs(y_pred_train))
In [84]:
# now check for both train and test data, how well the model learned the patterns
ann_acc_train = accuracy_score(y_train_new, y_pred_train)
ann_acc_test = accuracy_score(y_test_new, y_pred_test)
ann_pre_train = precision_score(y_train_new, y_pred_train, average='macro')
ann_pre_test = precision_score(y_test_new, y_pred_test, average='macro')
ann_rec_train = recall_score(y_train_new, y_pred_train, average='macro')
ann_rec_test = recall_score(y_test_new, y_pred_test, average='macro')
ann_f1_train = f1_score(y_train_new, y_pred_train, average='macro')
ann_f1_test = f1_score(y_test_new, y_pred_test, average='macro')

#print the scores
print('Accuracy scores of ANN classifier are:',
      'train: {:.2f}'.format(ann_acc_train), 'and test: {:.2f}.'.format(ann_acc_test))
print('Precision scores of ANN classifier are:',
      'train: {:.2f}'.format(ann_pre_train), 'and test: {:.2f}.'.format(ann_pre_test))
print('Recall scores of ANN classifier are:',
      'train: {:.2f}'.format(ann_rec_train), 'and test: {:.2f}.'.format(ann_rec_test))
print('F1 scores of ANN classifier are:',
      'train: {:.2f}'.format(ann_f1_train), 'and test: {:.2f}.'.format(ann_f1_test))
Accuracy scores of ANN classifier are: train: 0.83 and test: 0.80.
Precision scores of ANN classifier are: train: 0.78 and test: 0.73.
Recall scores of ANN classifier are: train: 0.72 and test: 0.71.
F1 scores of ANN classifier are: train: 0.74 and test: 0.72.
In [85]:
print(classification_report(y_test_new, np.round(abs(y_pred_test))))
              precision    recall  f1-score   support

           0       0.85      0.88      0.87       225
           1       0.61      0.53      0.57        75

    accuracy                           0.80       300
   macro avg       0.73      0.71      0.72       300
weighted avg       0.79      0.80      0.79       300

In [86]:
ann_acc_train
Out[86]:
0.8251445086705202
In [102]:
# Creates a confusion matrix
ann_train = confusion_matrix(y_train_new, y_pred_train)
#plots the confusion matrix
plt.figure(figsize=(6,5))
sns.heatmap(ann_train, annot=True, fmt=".1f")
plt.title('ANN train set\nAccuracy:{0:.3f}'.format(accuracy_score(y_train_new, y_pred_train)))
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
In [104]:
# Creates a confusion matrix
ann_test = confusion_matrix(y_test_new, y_pred_test)
#plots the confusion matrix
plt.figure(figsize=(6,5))
sns.heatmap(ann_test, annot=True, fmt=".1f")
plt.title('ANN test set\nAccuracy:{0:.3f}'.format(accuracy_score(y_test_new, y_pred_test)))
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
In [105]:
#training 
TP_ann_train = ann_train[1][1]
FP_ann_train = ann_train[1][0]
FN_ann_train = ann_train[0][1]
TN_ann_train = ann_train[0][0]
print('TP is : {:.2f}.'.format(TP_ann_train))
print('FP is : {:.2f}.'.format(FP_ann_train))
print('FN is : {:.2f}.'.format(FN_ann_train))
print('TN is : {:.2f}.'.format(TN_ann_train))

#test
TP_ann_test = ann_test[1][1]
FP_ann_test = ann_test[1][0]
FN_ann_test = ann_test[0][1]
TN_ann_test = ann_test[0][0]
print('TP is : {:.2f}.'.format(TP_ann_test))
print('FP is : {:.2f}.'.format(FP_ann_test))
print('FN is : {:.2f}.'.format(FN_ann_test))
print('TN is : {:.2f}.'.format(TN_ann_test))
TP is : 86.00.
FP is : 87.00.
FN is : 34.00.
TN is : 485.00.
TP is : 40.00.
FP is : 35.00.
FN is : 26.00.
TN is : 199.00.
In [90]:
#Now the normalize the diagonal entries
ann_acc_train_full = ann_train.astype('float') / ann_train.sum(axis=1)[:, np.newaxis]

#The diagonal entries are the accuracies of each class
ann_acc_train_full.diagonal()
Out[90]:
array([0.9344894 , 0.49710983])
In [91]:
#Now the normalize the diagonal entries
ann_acc_test_full = ann_test.astype('float') / ann_test.sum(axis=1)[:, np.newaxis]

#The diagonal entries are the accuracies of each class
ann_acc_test_full.diagonal()
Out[91]:
array([0.88444444, 0.53333333])
In [92]:
training_samples = len(y_train_new)
training_samples_test = len(y_test_new)

results = pd.DataFrame({'Classifier_Name':['Linear Discriminant Analysis', 'Linear Discriminant Analysis', 'Logistic Regression', 'Logistic Regression', 'Decision Trees', 'Decision Trees', 'k-Nearest Neighbors', 'k-Nearest Neighbors', 'Naïve Bayes', 'Naïve Bayes', 'Support Vector Machines', 'Support Vector Machines', 'Neural Networks', 'Neural Networks' ],
    'Training_or_test_set':['Training set', 'Test set', 'Training set', 'Test set', 'Training set', 'Test set', 'Training set', 'Test set', 'Training set', 'Test set', 'Training set', 'Test set', 'Training set', 'Test set'],
    'Number_of_training_samples':[training_samples, training_samples_test, training_samples, training_samples_test, training_samples, training_samples_test, training_samples, training_samples_test, training_samples, training_samples_test, training_samples, training_samples_test, training_samples, training_samples_test],
    'Number_of_non_healthy_companies_in_training_sample': [non_healthy_training, non_healthy_test, non_healthy_training, non_healthy_test, non_healthy_training, non_healthy_test, non_healthy_training, non_healthy_test, non_healthy_training, non_healthy_test, non_healthy_training, non_healthy_test, non_healthy_training, non_healthy_test,],
                       'TP': [TP_lda_train, TP_lda_test, TP_log_train, TP_log_test, TP_dt_train, TP_dt_test, TP_knn_train, TP_knn_test, TP_nb_train, TP_nb_test, TP_svm_train, TP_svm_test, TP_ann_train, TP_ann_test], 
                       'TN': [TN_lda_train, TN_lda_test, TN_log_train, TN_log_test, TN_dt_train, TN_dt_test, TN_knn_train, TN_knn_test, TN_nb_train, TN_nb_test, TN_svm_train, TN_svm_test, TN_ann_train, TN_ann_test]  ,
                       'FP': [FP_lda_train, FP_lda_test, FP_log_train, FP_log_test, FP_dt_train, FP_dt_test, FP_knn_train, FP_knn_test, FP_nb_train, FP_nb_test, FP_svm_train, FP_svm_test, FP_ann_train, FP_ann_test],
                       'FN': [FN_lda_train, FN_lda_test, FN_log_train, FN_log_test, FN_dt_train, FN_dt_test, FN_knn_train, FN_knn_test, FN_nb_train, FN_nb_test, FN_svm_train, FN_svm_test, FN_ann_train, FN_ann_test],
                       'Precision': [pre_train_lda, pre_test_lda, pre_train_log, pre_test_log, pre_train_dt, pre_test_dt, knn_pre_train_dt, knn_pre_test_dt, nb_pre_train, nb_pre_test, svm_pre_train, svm_pre_test, ann_pre_train, ann_pre_test],
                       'Recall': [rec_train_lda, rec_test_lda, rec_train_log, rec_test_log, rec_train_dt, rec_test_dt, knn_rec_train_dt, knn_rec_test_dt, nb_rec_train, nb_rec_test, svm_rec_train, svm_rec_test, ann_rec_train, ann_rec_test],
                       'F1 score': [f1_train_lda, f1_test_lda, f1_train_log, f1_test_log, f1_train_dt, f1_test_dt, knn_f1_train_dt, knn_f1_test_dt, nb_f1_train, nb_f1_test, svm_f1_train, svm_f1_test, ann_f1_train, ann_f1_test],
                       'Accuracy': [acc_train_lda, acc_test_lda, acc_train_log, acc_test_log, acc_train_dt, acc_test_dt, knn_acc_train_dt, knn_acc_test_dt, nb_acc_train, nb_acc_test, svm_acc_train, svm_acc_test, ann_acc_train, ann_acc_test]
                      })
#create directory to store the output file
outdir = './OutputData'
if not os.path.exists(outdir):
    os.mkdir(outdir)
results.to_excel('OutputData\\Results.xlsx')
In [93]:
results
Out[93]:
Classifier_Name Training_or_test_set Number_of_training_samples Number_of_non_healthy_companies_in_training_sample TP TN FP FN Precision Recall F1 score Accuracy
0 Linear Discriminant Analysis Training set 692 173 83 484 90 35 0.703390 0.479769 0.570447 0.819364
1 Linear Discriminant Analysis Test set 300 75 30 200 45 25 0.545455 0.400000 0.461538 0.766667
2 Logistic Regression Training set 692 173 66 491 107 28 0.761599 0.663776 0.686761 0.804913
3 Logistic Regression Test set 300 75 27 205 48 20 0.692372 0.635556 0.650182 0.773333
4 Decision Trees Training set 692 173 173 519 0 0 1.000000 1.000000 1.000000 1.000000
5 Decision Trees Test set 300 75 44 181 31 44 0.676887 0.695556 0.684126 0.750000
6 k-Nearest Neighbors Training set 692 173 104 491 69 28 0.832332 0.773603 0.796035 0.859827
7 k-Nearest Neighbors Test set 300 75 26 199 49 26 0.651210 0.615556 0.625443 0.750000
8 Naïve Bayes Training set 692 173 82 453 91 66 0.693387 0.673410 0.681604 0.773121
9 Naïve Bayes Test set 300 75 42 193 33 32 0.710775 0.708889 0.709817 0.783333
10 Support Vector Machines Training set 692 173 67 508 106 11 0.843168 0.683044 0.715299 0.830925
11 Support Vector Machines Test set 300 75 23 210 52 15 0.703395 0.620000 0.634751 0.776667
12 Neural Networks Training set 692 173 86 485 87 34 0.782284 0.715800 0.738062 0.825145
13 Neural Networks Test set 300 75 40 199 35 26 0.728244 0.708889 0.717239 0.796667
In [ ]: